{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "plt.rcParams['figure.figsize']=(15,8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 283,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 从文件中载入数据\n",
    "df_xtrain = pd.read_table('14cancer.xtrain',sep='\\s+',header=None, encoding='utf-8')\n",
    "df_ytrain = pd.read_table('14cancer.ytrain',sep='\\s+',header=None, encoding='utf-8')\n",
    "df_xtest = pd.read_table('14cancer.xtest',sep='\\s+',header=None, encoding='utf-8')\n",
    "df_ytest = pd.read_table('14cancer.ytest',sep='\\s+',header=None, encoding='utf-8')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 284,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2308, 63) (1, 63) (2308, 25) (1, 25)\n"
     ]
    }
   ],
   "source": [
    "# 查看数据的形状\n",
    "# shape=(n_genes,n_samples),shape=(1,n_class)\n",
    "print(df_xtrain.shape,df_ytrain.shape,df_xtest.shape,df_ytest.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 285,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>...</th>\n",
       "      <th>53</th>\n",
       "      <th>54</th>\n",
       "      <th>55</th>\n",
       "      <th>56</th>\n",
       "      <th>57</th>\n",
       "      <th>58</th>\n",
       "      <th>59</th>\n",
       "      <th>60</th>\n",
       "      <th>61</th>\n",
       "      <th>62</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.773344</td>\n",
       "      <td>-0.078178</td>\n",
       "      <td>-0.084469</td>\n",
       "      <td>0.965614</td>\n",
       "      <td>0.075664</td>\n",
       "      <td>0.458816</td>\n",
       "      <td>0.067098</td>\n",
       "      <td>0.094128</td>\n",
       "      <td>0.108316</td>\n",
       "      <td>-0.970747</td>\n",
       "      <td>...</td>\n",
       "      <td>0.242476</td>\n",
       "      <td>0.533800</td>\n",
       "      <td>-1.940027</td>\n",
       "      <td>-1.051824</td>\n",
       "      <td>-0.968637</td>\n",
       "      <td>-2.683846</td>\n",
       "      <td>-1.207646</td>\n",
       "      <td>-1.684161</td>\n",
       "      <td>-2.258568</td>\n",
       "      <td>-1.146333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-2.438405</td>\n",
       "      <td>-2.415754</td>\n",
       "      <td>-1.649739</td>\n",
       "      <td>-2.380547</td>\n",
       "      <td>-1.728785</td>\n",
       "      <td>-2.875286</td>\n",
       "      <td>-1.624044</td>\n",
       "      <td>-1.795165</td>\n",
       "      <td>-1.944911</td>\n",
       "      <td>-2.347582</td>\n",
       "      <td>...</td>\n",
       "      <td>-2.859455</td>\n",
       "      <td>-1.529241</td>\n",
       "      <td>-2.877061</td>\n",
       "      <td>-2.887775</td>\n",
       "      <td>-2.748872</td>\n",
       "      <td>-2.117767</td>\n",
       "      <td>-2.478130</td>\n",
       "      <td>-2.053384</td>\n",
       "      <td>-2.308603</td>\n",
       "      <td>-3.007805</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.482562</td>\n",
       "      <td>0.412772</td>\n",
       "      <td>-0.241307</td>\n",
       "      <td>0.625297</td>\n",
       "      <td>0.852626</td>\n",
       "      <td>0.135841</td>\n",
       "      <td>0.519627</td>\n",
       "      <td>0.702751</td>\n",
       "      <td>0.600099</td>\n",
       "      <td>-0.392006</td>\n",
       "      <td>...</td>\n",
       "      <td>0.260362</td>\n",
       "      <td>0.436059</td>\n",
       "      <td>0.071297</td>\n",
       "      <td>0.640853</td>\n",
       "      <td>0.333683</td>\n",
       "      <td>-0.523236</td>\n",
       "      <td>0.094310</td>\n",
       "      <td>0.563835</td>\n",
       "      <td>-1.443076</td>\n",
       "      <td>-0.029326</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-2.721135</td>\n",
       "      <td>-2.825146</td>\n",
       "      <td>-2.875286</td>\n",
       "      <td>-1.741256</td>\n",
       "      <td>0.272695</td>\n",
       "      <td>0.405398</td>\n",
       "      <td>0.238229</td>\n",
       "      <td>0.206038</td>\n",
       "      <td>-0.051083</td>\n",
       "      <td>-0.141218</td>\n",
       "      <td>...</td>\n",
       "      <td>0.361234</td>\n",
       "      <td>0.583779</td>\n",
       "      <td>-2.121932</td>\n",
       "      <td>-2.085057</td>\n",
       "      <td>-1.144133</td>\n",
       "      <td>-2.174192</td>\n",
       "      <td>0.273456</td>\n",
       "      <td>0.314446</td>\n",
       "      <td>0.233094</td>\n",
       "      <td>0.237835</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-1.217058</td>\n",
       "      <td>-0.626236</td>\n",
       "      <td>-0.889405</td>\n",
       "      <td>-0.845366</td>\n",
       "      <td>-1.841370</td>\n",
       "      <td>-2.082647</td>\n",
       "      <td>-1.397558</td>\n",
       "      <td>-1.868209</td>\n",
       "      <td>-1.981952</td>\n",
       "      <td>-1.823250</td>\n",
       "      <td>...</td>\n",
       "      <td>-2.697110</td>\n",
       "      <td>-1.648179</td>\n",
       "      <td>-0.932674</td>\n",
       "      <td>-1.087079</td>\n",
       "      <td>-0.772190</td>\n",
       "      <td>-0.448947</td>\n",
       "      <td>-1.113218</td>\n",
       "      <td>-2.052605</td>\n",
       "      <td>-1.779633</td>\n",
       "      <td>-2.859455</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 63 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         0         1         2         3         4         5         6   \\\n",
       "0  0.773344 -0.078178 -0.084469  0.965614  0.075664  0.458816  0.067098   \n",
       "1 -2.438405 -2.415754 -1.649739 -2.380547 -1.728785 -2.875286 -1.624044   \n",
       "2 -0.482562  0.412772 -0.241307  0.625297  0.852626  0.135841  0.519627   \n",
       "3 -2.721135 -2.825146 -2.875286 -1.741256  0.272695  0.405398  0.238229   \n",
       "4 -1.217058 -0.626236 -0.889405 -0.845366 -1.841370 -2.082647 -1.397558   \n",
       "\n",
       "         7         8         9     ...           53        54        55  \\\n",
       "0  0.094128  0.108316 -0.970747    ...     0.242476  0.533800 -1.940027   \n",
       "1 -1.795165 -1.944911 -2.347582    ...    -2.859455 -1.529241 -2.877061   \n",
       "2  0.702751  0.600099 -0.392006    ...     0.260362  0.436059  0.071297   \n",
       "3  0.206038 -0.051083 -0.141218    ...     0.361234  0.583779 -2.121932   \n",
       "4 -1.868209 -1.981952 -1.823250    ...    -2.697110 -1.648179 -0.932674   \n",
       "\n",
       "         56        57        58        59        60        61        62  \n",
       "0 -1.051824 -0.968637 -2.683846 -1.207646 -1.684161 -2.258568 -1.146333  \n",
       "1 -2.887775 -2.748872 -2.117767 -2.478130 -2.053384 -2.308603 -3.007805  \n",
       "2  0.640853  0.333683 -0.523236  0.094310  0.563835 -1.443076 -0.029326  \n",
       "3 -2.085057 -1.144133 -2.174192  0.273456  0.314446  0.233094  0.237835  \n",
       "4 -1.087079 -0.772190 -0.448947 -1.113218 -2.052605 -1.779633 -2.859455  \n",
       "\n",
       "[5 rows x 63 columns]"
      ]
     },
     "execution_count": 285,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 对数据进行预览\n",
    "df_xtrain.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 286,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 对数据进行初步清洗，消除nan值\n",
    "def drop_df_nan(df_x,df_y): \n",
    "    # x: dataframe shape=(m,n)   n: the number of samples\n",
    "    # y: dataframe shape=(1,n)\n",
    "    # 删除样本中那些包含nan值的样本（列）\n",
    "    nan_x_index = np.where(np.isnan(df_x))   # 返回np.nan值所在的行和列 （rows_index，columns_index）\n",
    "    nan_y_index = np.where(np.isnan(df_y)) \n",
    "    nan_rows = np.union1d(nan_x_index[0],nan_y_index[0])   # 求取两个并集\n",
    "    nan_cols = np.union1d(nan_x_index[1],nan_y_index[1])\n",
    "    #print(list(df_x.columns[nan_cols]),type(list(df_x.columns[nan_cols])))\n",
    "    if len(nan_cols)!= 0:              # 如果返回的列索引值非空，即存在某些列存在np.nan值，我们进行删除,注意drop不改变元对象\n",
    "        df_x=df_x.drop(columns=nan_cols)\n",
    "        df_y=df_y.drop(columns=nan_cols)\n",
    "    return df_x,df_y\n",
    "df_xtrain,df_ytrain = drop_df_nan(df_xtrain, df_ytrain)\n",
    "df_xtest, df_ytest = drop_df_nan(df_xtest, df_ytest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 287,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 按照机器学习中的常规习惯，我们将数据进行转置，使shape=(n_samples,d)\n",
    "df_xtrain = df_xtrain.T\n",
    "df_xtest = df_xtest.T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 288,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>...</th>\n",
       "      <th>2298</th>\n",
       "      <th>2299</th>\n",
       "      <th>2300</th>\n",
       "      <th>2301</th>\n",
       "      <th>2302</th>\n",
       "      <th>2303</th>\n",
       "      <th>2304</th>\n",
       "      <th>2305</th>\n",
       "      <th>2306</th>\n",
       "      <th>2307</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.773344</td>\n",
       "      <td>-2.438405</td>\n",
       "      <td>-0.482562</td>\n",
       "      <td>-2.721135</td>\n",
       "      <td>-1.217058</td>\n",
       "      <td>0.827809</td>\n",
       "      <td>1.342604</td>\n",
       "      <td>0.057042</td>\n",
       "      <td>0.133569</td>\n",
       "      <td>0.565427</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.238511</td>\n",
       "      <td>-0.027474</td>\n",
       "      <td>-1.660205</td>\n",
       "      <td>0.588231</td>\n",
       "      <td>-0.463624</td>\n",
       "      <td>-3.952845</td>\n",
       "      <td>-5.496768</td>\n",
       "      <td>-1.414282</td>\n",
       "      <td>-0.647600</td>\n",
       "      <td>-1.763172</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.078178</td>\n",
       "      <td>-2.415754</td>\n",
       "      <td>0.412772</td>\n",
       "      <td>-2.825146</td>\n",
       "      <td>-0.626236</td>\n",
       "      <td>0.054488</td>\n",
       "      <td>1.429498</td>\n",
       "      <td>-0.120249</td>\n",
       "      <td>0.456792</td>\n",
       "      <td>0.159053</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.657394</td>\n",
       "      <td>-0.246284</td>\n",
       "      <td>-0.836325</td>\n",
       "      <td>-0.571284</td>\n",
       "      <td>0.034788</td>\n",
       "      <td>-2.478130</td>\n",
       "      <td>-3.661264</td>\n",
       "      <td>-1.093923</td>\n",
       "      <td>-1.209320</td>\n",
       "      <td>-0.824395</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.084469</td>\n",
       "      <td>-1.649739</td>\n",
       "      <td>-0.241307</td>\n",
       "      <td>-2.875286</td>\n",
       "      <td>-0.889405</td>\n",
       "      <td>-0.027474</td>\n",
       "      <td>1.159300</td>\n",
       "      <td>0.015676</td>\n",
       "      <td>0.191942</td>\n",
       "      <td>0.496585</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.696352</td>\n",
       "      <td>0.024985</td>\n",
       "      <td>-1.059872</td>\n",
       "      <td>-0.403767</td>\n",
       "      <td>-0.678653</td>\n",
       "      <td>-2.939352</td>\n",
       "      <td>-2.736450</td>\n",
       "      <td>-1.965399</td>\n",
       "      <td>-0.805868</td>\n",
       "      <td>-1.139434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.965614</td>\n",
       "      <td>-2.380547</td>\n",
       "      <td>0.625297</td>\n",
       "      <td>-1.741256</td>\n",
       "      <td>-0.845366</td>\n",
       "      <td>0.949687</td>\n",
       "      <td>1.093801</td>\n",
       "      <td>0.819736</td>\n",
       "      <td>-0.284620</td>\n",
       "      <td>0.994732</td>\n",
       "      <td>...</td>\n",
       "      <td>0.259746</td>\n",
       "      <td>0.357115</td>\n",
       "      <td>-1.893128</td>\n",
       "      <td>0.255107</td>\n",
       "      <td>0.163309</td>\n",
       "      <td>-1.021929</td>\n",
       "      <td>-2.077843</td>\n",
       "      <td>-1.127629</td>\n",
       "      <td>0.331531</td>\n",
       "      <td>-2.179483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.075664</td>\n",
       "      <td>-1.728785</td>\n",
       "      <td>0.852626</td>\n",
       "      <td>0.272695</td>\n",
       "      <td>-1.841370</td>\n",
       "      <td>0.327936</td>\n",
       "      <td>1.251219</td>\n",
       "      <td>0.771450</td>\n",
       "      <td>0.030917</td>\n",
       "      <td>0.278313</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.200404</td>\n",
       "      <td>0.061753</td>\n",
       "      <td>-2.273998</td>\n",
       "      <td>-0.039365</td>\n",
       "      <td>0.368801</td>\n",
       "      <td>-2.566551</td>\n",
       "      <td>-1.675044</td>\n",
       "      <td>-1.082050</td>\n",
       "      <td>-0.965218</td>\n",
       "      <td>-1.836966</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 2308 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       0         1         2         3         4         5         6     \\\n",
       "0  0.773344 -2.438405 -0.482562 -2.721135 -1.217058  0.827809  1.342604   \n",
       "1 -0.078178 -2.415754  0.412772 -2.825146 -0.626236  0.054488  1.429498   \n",
       "2 -0.084469 -1.649739 -0.241307 -2.875286 -0.889405 -0.027474  1.159300   \n",
       "3  0.965614 -2.380547  0.625297 -1.741256 -0.845366  0.949687  1.093801   \n",
       "4  0.075664 -1.728785  0.852626  0.272695 -1.841370  0.327936  1.251219   \n",
       "\n",
       "       7         8         9       ...         2298      2299      2300  \\\n",
       "0  0.057042  0.133569  0.565427    ...    -0.238511 -0.027474 -1.660205   \n",
       "1 -0.120249  0.456792  0.159053    ...    -0.657394 -0.246284 -0.836325   \n",
       "2  0.015676  0.191942  0.496585    ...    -0.696352  0.024985 -1.059872   \n",
       "3  0.819736 -0.284620  0.994732    ...     0.259746  0.357115 -1.893128   \n",
       "4  0.771450  0.030917  0.278313    ...    -0.200404  0.061753 -2.273998   \n",
       "\n",
       "       2301      2302      2303      2304      2305      2306      2307  \n",
       "0  0.588231 -0.463624 -3.952845 -5.496768 -1.414282 -0.647600 -1.763172  \n",
       "1 -0.571284  0.034788 -2.478130 -3.661264 -1.093923 -1.209320 -0.824395  \n",
       "2 -0.403767 -0.678653 -2.939352 -2.736450 -1.965399 -0.805868 -1.139434  \n",
       "3  0.255107  0.163309 -1.021929 -2.077843 -1.127629  0.331531 -2.179483  \n",
       "4 -0.039365  0.368801 -2.566551 -1.675044 -1.082050 -0.965218 -1.836966  \n",
       "\n",
       "[5 rows x 2308 columns]"
      ]
     },
     "execution_count": 288,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 观察调整后的数据\n",
    "df_xtrain.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 289,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>...</th>\n",
       "      <th>2298</th>\n",
       "      <th>2299</th>\n",
       "      <th>2300</th>\n",
       "      <th>2301</th>\n",
       "      <th>2302</th>\n",
       "      <th>2303</th>\n",
       "      <th>2304</th>\n",
       "      <th>2305</th>\n",
       "      <th>2306</th>\n",
       "      <th>2307</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.139501</td>\n",
       "      <td>-1.168927</td>\n",
       "      <td>0.564973</td>\n",
       "      <td>-3.366796</td>\n",
       "      <td>-1.323132</td>\n",
       "      <td>-0.692547</td>\n",
       "      <td>2.327395</td>\n",
       "      <td>0.923703</td>\n",
       "      <td>0.112167</td>\n",
       "      <td>0.509765</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.180803</td>\n",
       "      <td>-0.942635</td>\n",
       "      <td>-1.210662</td>\n",
       "      <td>-0.588787</td>\n",
       "      <td>-0.070422</td>\n",
       "      <td>-2.783852</td>\n",
       "      <td>-2.840439</td>\n",
       "      <td>-1.160913</td>\n",
       "      <td>-0.343054</td>\n",
       "      <td>-0.055513</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.164275</td>\n",
       "      <td>-2.018158</td>\n",
       "      <td>1.103533</td>\n",
       "      <td>-2.165435</td>\n",
       "      <td>-1.440117</td>\n",
       "      <td>-0.437420</td>\n",
       "      <td>2.661587</td>\n",
       "      <td>1.224011</td>\n",
       "      <td>0.210504</td>\n",
       "      <td>1.045563</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.709480</td>\n",
       "      <td>-1.532940</td>\n",
       "      <td>-2.385967</td>\n",
       "      <td>-0.389641</td>\n",
       "      <td>0.422781</td>\n",
       "      <td>-2.816750</td>\n",
       "      <td>-2.422495</td>\n",
       "      <td>-1.722607</td>\n",
       "      <td>-1.703749</td>\n",
       "      <td>-1.699910</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.841093</td>\n",
       "      <td>0.254720</td>\n",
       "      <td>-0.208748</td>\n",
       "      <td>-2.148149</td>\n",
       "      <td>-1.512765</td>\n",
       "      <td>-1.263723</td>\n",
       "      <td>2.946642</td>\n",
       "      <td>0.087828</td>\n",
       "      <td>0.482920</td>\n",
       "      <td>1.063020</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.067958</td>\n",
       "      <td>-1.854060</td>\n",
       "      <td>-1.541312</td>\n",
       "      <td>-1.773723</td>\n",
       "      <td>-1.879935</td>\n",
       "      <td>-2.265289</td>\n",
       "      <td>-2.405726</td>\n",
       "      <td>-0.176379</td>\n",
       "      <td>-0.128743</td>\n",
       "      <td>-0.996417</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.685065</td>\n",
       "      <td>-1.927579</td>\n",
       "      <td>-0.233068</td>\n",
       "      <td>-1.640413</td>\n",
       "      <td>-1.008954</td>\n",
       "      <td>0.774451</td>\n",
       "      <td>1.617168</td>\n",
       "      <td>-0.567925</td>\n",
       "      <td>0.036621</td>\n",
       "      <td>-0.101701</td>\n",
       "      <td>...</td>\n",
       "      <td>1.077559</td>\n",
       "      <td>-0.263966</td>\n",
       "      <td>-1.966113</td>\n",
       "      <td>-1.086190</td>\n",
       "      <td>0.885914</td>\n",
       "      <td>-0.248590</td>\n",
       "      <td>0.385874</td>\n",
       "      <td>-0.508163</td>\n",
       "      <td>-0.626985</td>\n",
       "      <td>-0.699366</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>-1.956163</td>\n",
       "      <td>-2.234926</td>\n",
       "      <td>0.281563</td>\n",
       "      <td>-2.695628</td>\n",
       "      <td>-1.214697</td>\n",
       "      <td>-1.059872</td>\n",
       "      <td>2.498070</td>\n",
       "      <td>0.780196</td>\n",
       "      <td>1.041583</td>\n",
       "      <td>0.727500</td>\n",
       "      <td>...</td>\n",
       "      <td>-1.209320</td>\n",
       "      <td>-0.693147</td>\n",
       "      <td>-1.846427</td>\n",
       "      <td>-0.993442</td>\n",
       "      <td>-3.294138</td>\n",
       "      <td>-3.332605</td>\n",
       "      <td>-2.282782</td>\n",
       "      <td>-0.656622</td>\n",
       "      <td>-2.012157</td>\n",
       "      <td>-1.668657</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 2308 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       0         1         2         3         4         5         6     \\\n",
       "0  0.139501 -1.168927  0.564973 -3.366796 -1.323132 -0.692547  2.327395   \n",
       "1  1.164275 -2.018158  1.103533 -2.165435 -1.440117 -0.437420  2.661587   \n",
       "3  0.841093  0.254720 -0.208748 -2.148149 -1.512765 -1.263723  2.946642   \n",
       "5  0.685065 -1.927579 -0.233068 -1.640413 -1.008954  0.774451  1.617168   \n",
       "6 -1.956163 -2.234926  0.281563 -2.695628 -1.214697 -1.059872  2.498070   \n",
       "\n",
       "       7         8         9       ...         2298      2299      2300  \\\n",
       "0  0.923703  0.112167  0.509765    ...    -0.180803 -0.942635 -1.210662   \n",
       "1  1.224011  0.210504  1.045563    ...    -0.709480 -1.532940 -2.385967   \n",
       "3  0.087828  0.482920  1.063020    ...    -0.067958 -1.854060 -1.541312   \n",
       "5 -0.567925  0.036621 -0.101701    ...     1.077559 -0.263966 -1.966113   \n",
       "6  0.780196  1.041583  0.727500    ...    -1.209320 -0.693147 -1.846427   \n",
       "\n",
       "       2301      2302      2303      2304      2305      2306      2307  \n",
       "0 -0.588787 -0.070422 -2.783852 -2.840439 -1.160913 -0.343054 -0.055513  \n",
       "1 -0.389641  0.422781 -2.816750 -2.422495 -1.722607 -1.703749 -1.699910  \n",
       "3 -1.773723 -1.879935 -2.265289 -2.405726 -0.176379 -0.128743 -0.996417  \n",
       "5 -1.086190  0.885914 -0.248590  0.385874 -0.508163 -0.626985 -0.699366  \n",
       "6 -0.993442 -3.294138 -3.332605 -2.282782 -0.656622 -2.012157 -1.668657  \n",
       "\n",
       "[5 rows x 2308 columns]"
      ]
     },
     "execution_count": 289,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_xtest.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 290,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将每个样本的类别添加到dataframe的第1列\n",
    "df_xtrain.insert(0,'Type',df_ytrain.T)\n",
    "df_xtest.insert(0,'Type',df_ytest.T)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 291,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Type</th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>...</th>\n",
       "      <th>2298</th>\n",
       "      <th>2299</th>\n",
       "      <th>2300</th>\n",
       "      <th>2301</th>\n",
       "      <th>2302</th>\n",
       "      <th>2303</th>\n",
       "      <th>2304</th>\n",
       "      <th>2305</th>\n",
       "      <th>2306</th>\n",
       "      <th>2307</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>0.773344</td>\n",
       "      <td>-2.438405</td>\n",
       "      <td>-0.482562</td>\n",
       "      <td>-2.721135</td>\n",
       "      <td>-1.217058</td>\n",
       "      <td>0.827809</td>\n",
       "      <td>1.342604</td>\n",
       "      <td>0.057042</td>\n",
       "      <td>0.133569</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.238511</td>\n",
       "      <td>-0.027474</td>\n",
       "      <td>-1.660205</td>\n",
       "      <td>0.588231</td>\n",
       "      <td>-0.463624</td>\n",
       "      <td>-3.952845</td>\n",
       "      <td>-5.496768</td>\n",
       "      <td>-1.414282</td>\n",
       "      <td>-0.647600</td>\n",
       "      <td>-1.763172</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>-0.078178</td>\n",
       "      <td>-2.415754</td>\n",
       "      <td>0.412772</td>\n",
       "      <td>-2.825146</td>\n",
       "      <td>-0.626236</td>\n",
       "      <td>0.054488</td>\n",
       "      <td>1.429498</td>\n",
       "      <td>-0.120249</td>\n",
       "      <td>0.456792</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.657394</td>\n",
       "      <td>-0.246284</td>\n",
       "      <td>-0.836325</td>\n",
       "      <td>-0.571284</td>\n",
       "      <td>0.034788</td>\n",
       "      <td>-2.478130</td>\n",
       "      <td>-3.661264</td>\n",
       "      <td>-1.093923</td>\n",
       "      <td>-1.209320</td>\n",
       "      <td>-0.824395</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>-0.084469</td>\n",
       "      <td>-1.649739</td>\n",
       "      <td>-0.241307</td>\n",
       "      <td>-2.875286</td>\n",
       "      <td>-0.889405</td>\n",
       "      <td>-0.027474</td>\n",
       "      <td>1.159300</td>\n",
       "      <td>0.015676</td>\n",
       "      <td>0.191942</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.696352</td>\n",
       "      <td>0.024985</td>\n",
       "      <td>-1.059872</td>\n",
       "      <td>-0.403767</td>\n",
       "      <td>-0.678653</td>\n",
       "      <td>-2.939352</td>\n",
       "      <td>-2.736450</td>\n",
       "      <td>-1.965399</td>\n",
       "      <td>-0.805868</td>\n",
       "      <td>-1.139434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>0.965614</td>\n",
       "      <td>-2.380547</td>\n",
       "      <td>0.625297</td>\n",
       "      <td>-1.741256</td>\n",
       "      <td>-0.845366</td>\n",
       "      <td>0.949687</td>\n",
       "      <td>1.093801</td>\n",
       "      <td>0.819736</td>\n",
       "      <td>-0.284620</td>\n",
       "      <td>...</td>\n",
       "      <td>0.259746</td>\n",
       "      <td>0.357115</td>\n",
       "      <td>-1.893128</td>\n",
       "      <td>0.255107</td>\n",
       "      <td>0.163309</td>\n",
       "      <td>-1.021929</td>\n",
       "      <td>-2.077843</td>\n",
       "      <td>-1.127629</td>\n",
       "      <td>0.331531</td>\n",
       "      <td>-2.179483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>0.075664</td>\n",
       "      <td>-1.728785</td>\n",
       "      <td>0.852626</td>\n",
       "      <td>0.272695</td>\n",
       "      <td>-1.841370</td>\n",
       "      <td>0.327936</td>\n",
       "      <td>1.251219</td>\n",
       "      <td>0.771450</td>\n",
       "      <td>0.030917</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.200404</td>\n",
       "      <td>0.061753</td>\n",
       "      <td>-2.273998</td>\n",
       "      <td>-0.039365</td>\n",
       "      <td>0.368801</td>\n",
       "      <td>-2.566551</td>\n",
       "      <td>-1.675044</td>\n",
       "      <td>-1.082050</td>\n",
       "      <td>-0.965218</td>\n",
       "      <td>-1.836966</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 2309 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Type         0         1         2         3         4         5         6  \\\n",
       "0     2  0.773344 -2.438405 -0.482562 -2.721135 -1.217058  0.827809  1.342604   \n",
       "1     2 -0.078178 -2.415754  0.412772 -2.825146 -0.626236  0.054488  1.429498   \n",
       "2     2 -0.084469 -1.649739 -0.241307 -2.875286 -0.889405 -0.027474  1.159300   \n",
       "3     2  0.965614 -2.380547  0.625297 -1.741256 -0.845366  0.949687  1.093801   \n",
       "4     2  0.075664 -1.728785  0.852626  0.272695 -1.841370  0.327936  1.251219   \n",
       "\n",
       "          7         8    ...         2298      2299      2300      2301  \\\n",
       "0  0.057042  0.133569    ...    -0.238511 -0.027474 -1.660205  0.588231   \n",
       "1 -0.120249  0.456792    ...    -0.657394 -0.246284 -0.836325 -0.571284   \n",
       "2  0.015676  0.191942    ...    -0.696352  0.024985 -1.059872 -0.403767   \n",
       "3  0.819736 -0.284620    ...     0.259746  0.357115 -1.893128  0.255107   \n",
       "4  0.771450  0.030917    ...    -0.200404  0.061753 -2.273998 -0.039365   \n",
       "\n",
       "       2302      2303      2304      2305      2306      2307  \n",
       "0 -0.463624 -3.952845 -5.496768 -1.414282 -0.647600 -1.763172  \n",
       "1  0.034788 -2.478130 -3.661264 -1.093923 -1.209320 -0.824395  \n",
       "2 -0.678653 -2.939352 -2.736450 -1.965399 -0.805868 -1.139434  \n",
       "3  0.163309 -1.021929 -2.077843 -1.127629  0.331531 -2.179483  \n",
       "4  0.368801 -2.566551 -1.675044 -1.082050 -0.965218 -1.836966  \n",
       "\n",
       "[5 rows x 2309 columns]"
      ]
     },
     "execution_count": 291,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_xtrain.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 292,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Type</th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>...</th>\n",
       "      <th>2298</th>\n",
       "      <th>2299</th>\n",
       "      <th>2300</th>\n",
       "      <th>2301</th>\n",
       "      <th>2302</th>\n",
       "      <th>2303</th>\n",
       "      <th>2304</th>\n",
       "      <th>2305</th>\n",
       "      <th>2306</th>\n",
       "      <th>2307</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>0.773344</td>\n",
       "      <td>-2.438405</td>\n",
       "      <td>-0.482562</td>\n",
       "      <td>-2.721135</td>\n",
       "      <td>-1.217058</td>\n",
       "      <td>0.827809</td>\n",
       "      <td>1.342604</td>\n",
       "      <td>0.057042</td>\n",
       "      <td>0.133569</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.238511</td>\n",
       "      <td>-0.027474</td>\n",
       "      <td>-1.660205</td>\n",
       "      <td>0.588231</td>\n",
       "      <td>-0.463624</td>\n",
       "      <td>-3.952845</td>\n",
       "      <td>-5.496768</td>\n",
       "      <td>-1.414282</td>\n",
       "      <td>-0.647600</td>\n",
       "      <td>-1.763172</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>-0.078178</td>\n",
       "      <td>-2.415754</td>\n",
       "      <td>0.412772</td>\n",
       "      <td>-2.825146</td>\n",
       "      <td>-0.626236</td>\n",
       "      <td>0.054488</td>\n",
       "      <td>1.429498</td>\n",
       "      <td>-0.120249</td>\n",
       "      <td>0.456792</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.657394</td>\n",
       "      <td>-0.246284</td>\n",
       "      <td>-0.836325</td>\n",
       "      <td>-0.571284</td>\n",
       "      <td>0.034788</td>\n",
       "      <td>-2.478130</td>\n",
       "      <td>-3.661264</td>\n",
       "      <td>-1.093923</td>\n",
       "      <td>-1.209320</td>\n",
       "      <td>-0.824395</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>-0.084469</td>\n",
       "      <td>-1.649739</td>\n",
       "      <td>-0.241307</td>\n",
       "      <td>-2.875286</td>\n",
       "      <td>-0.889405</td>\n",
       "      <td>-0.027474</td>\n",
       "      <td>1.159300</td>\n",
       "      <td>0.015676</td>\n",
       "      <td>0.191942</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.696352</td>\n",
       "      <td>0.024985</td>\n",
       "      <td>-1.059872</td>\n",
       "      <td>-0.403767</td>\n",
       "      <td>-0.678653</td>\n",
       "      <td>-2.939352</td>\n",
       "      <td>-2.736450</td>\n",
       "      <td>-1.965399</td>\n",
       "      <td>-0.805868</td>\n",
       "      <td>-1.139434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>0.965614</td>\n",
       "      <td>-2.380547</td>\n",
       "      <td>0.625297</td>\n",
       "      <td>-1.741256</td>\n",
       "      <td>-0.845366</td>\n",
       "      <td>0.949687</td>\n",
       "      <td>1.093801</td>\n",
       "      <td>0.819736</td>\n",
       "      <td>-0.284620</td>\n",
       "      <td>...</td>\n",
       "      <td>0.259746</td>\n",
       "      <td>0.357115</td>\n",
       "      <td>-1.893128</td>\n",
       "      <td>0.255107</td>\n",
       "      <td>0.163309</td>\n",
       "      <td>-1.021929</td>\n",
       "      <td>-2.077843</td>\n",
       "      <td>-1.127629</td>\n",
       "      <td>0.331531</td>\n",
       "      <td>-2.179483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>0.075664</td>\n",
       "      <td>-1.728785</td>\n",
       "      <td>0.852626</td>\n",
       "      <td>0.272695</td>\n",
       "      <td>-1.841370</td>\n",
       "      <td>0.327936</td>\n",
       "      <td>1.251219</td>\n",
       "      <td>0.771450</td>\n",
       "      <td>0.030917</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.200404</td>\n",
       "      <td>0.061753</td>\n",
       "      <td>-2.273998</td>\n",
       "      <td>-0.039365</td>\n",
       "      <td>0.368801</td>\n",
       "      <td>-2.566551</td>\n",
       "      <td>-1.675044</td>\n",
       "      <td>-1.082050</td>\n",
       "      <td>-0.965218</td>\n",
       "      <td>-1.836966</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 2309 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Type         0         1         2         3         4         5         6  \\\n",
       "0     2  0.773344 -2.438405 -0.482562 -2.721135 -1.217058  0.827809  1.342604   \n",
       "1     2 -0.078178 -2.415754  0.412772 -2.825146 -0.626236  0.054488  1.429498   \n",
       "2     2 -0.084469 -1.649739 -0.241307 -2.875286 -0.889405 -0.027474  1.159300   \n",
       "3     2  0.965614 -2.380547  0.625297 -1.741256 -0.845366  0.949687  1.093801   \n",
       "4     2  0.075664 -1.728785  0.852626  0.272695 -1.841370  0.327936  1.251219   \n",
       "\n",
       "          7         8    ...         2298      2299      2300      2301  \\\n",
       "0  0.057042  0.133569    ...    -0.238511 -0.027474 -1.660205  0.588231   \n",
       "1 -0.120249  0.456792    ...    -0.657394 -0.246284 -0.836325 -0.571284   \n",
       "2  0.015676  0.191942    ...    -0.696352  0.024985 -1.059872 -0.403767   \n",
       "3  0.819736 -0.284620    ...     0.259746  0.357115 -1.893128  0.255107   \n",
       "4  0.771450  0.030917    ...    -0.200404  0.061753 -2.273998 -0.039365   \n",
       "\n",
       "       2302      2303      2304      2305      2306      2307  \n",
       "0 -0.463624 -3.952845 -5.496768 -1.414282 -0.647600 -1.763172  \n",
       "1  0.034788 -2.478130 -3.661264 -1.093923 -1.209320 -0.824395  \n",
       "2 -0.678653 -2.939352 -2.736450 -1.965399 -0.805868 -1.139434  \n",
       "3  0.163309 -1.021929 -2.077843 -1.127629  0.331531 -2.179483  \n",
       "4  0.368801 -2.566551 -1.675044 -1.082050 -0.965218 -1.836966  \n",
       "\n",
       "[5 rows x 2309 columns]"
      ]
     },
     "execution_count": 292,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_xtrain.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 297,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a\n"
     ]
    }
   ],
   "source": [
    "print('a')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
